#coding=utf-8
import numpy as np

def softmax_loss_native(W, X, y, reg):
    '''
    Softmax_loss的暴力实现，利用了for循环
    输入的维度是D，有C个分类类别，并且我们在有N个例子的batch上进行操作
    输入:
    - W: 一个numpy array，形状是(D, C)，代表权重
    - X: 一个形状为(N, D)为numpy array，代表输入数据
    - y: 一个形状为(N,)的numpy array，代表类别标签
    - reg: (float)正则化参数
    f返回:
    - 一个浮点数代表Loss
    - 和W形状一样的梯度
    '''
    loss = 0.0
    dW = np.zeros_like(W) #dW代表W反向传播的梯度
    num_classes = W.shape[1]
    num_train = X.shape[0]
    for i in xrange(num_train):
        scores = X[i].dot(W)
        shift_scores = scores - max(scores) #防止数值不稳定
        loss_i = -shift_scores[y[i]] + np.log(sum(np.exp(shift_scores)))
        loss += loss_i
        for j in xrange(num_classes):
            softmax_output = np.exp(shift_scores[j]) / sum(np.exp(shift_scores))
            if j == y[i]:
                dW[:, j] += (-1 + softmax_output) * X[i]
            else:
                dW[:, j] += softmax_output * X[i]
    loss /= num_train
    loss += 0.5 * reg * np.sum(W * W)
    dW = dW / num_train + reg * W
    return loss, dW

def softmax_loss_vectorized(W, X, y, reg):
    loss = 0.0
    dW = np.zeros_like(W)
    num_class = W.shape[1]
    num_train = X.shape[0]
    scores = X.dot(W)
    shift_scores = scores - np.max(scores, axis=1).reshape(-1, 1)
    softmax_output  = np.exp(shift_scores) / np.sum(np.exp(shift_scores), axis=1).reshape(-1, 1)
    loss = -np.sum(np.log(softmax_output[range(num_train), list(y)]))
    loss /= num_train
    loss += 0.5 * reg * np.sum(W * W)
    dS = softmax_output.copy()
    dS[range(num_train), list(y)] += -1
    dW = (x.T).dot(dS)
    dW = dW/num_train + reg*W
    return loss, dW


